In [1]:
import pandas as pd
from urllib.request import urlopen
import json
import plotly.express as px
In [2]:
#step 1 load in your data

#this line allows you to open files 
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
In [3]:
#this creates a dataframe, df is a naming convention 
df = pd.read_csv(r"C:\Users\Christopher\Default_Apps\Downloads\usa_county_wise.csv")
In [4]:
#get comfy with data
df.describe()
Out[4]:
UID code3 FIPS Lat Long_ Confirmed Deaths
count 6.279200e+05 627920.000000 626040.000000 627920.000000 627920.000000 627920.000000 627920.000000
mean 8.342958e+07 834.491617 33061.684685 36.707212 -88.601474 357.284285 17.536328
std 4.314702e+06 36.492620 18636.156825 9.061572 21.715747 3487.282694 300.991466
min 1.600000e+01 16.000000 60.000000 -14.271000 -174.159600 0.000000 0.000000
25% 8.401811e+07 840.000000 19079.000000 33.895587 -97.790204 0.000000 0.000000
50% 8.402921e+07 840.000000 31014.000000 38.002344 -89.486710 4.000000 0.000000
75% 8.404612e+07 840.000000 47131.000000 41.573069 -82.311265 63.000000 1.000000
max 8.410000e+07 850.000000 99999.000000 69.314792 145.673900 224051.000000 23500.000000
In [5]:
#this is important if you want to build models, see later in pre-processing
df.dtypes
Out[5]:
UID                 int64
iso2               object
iso3               object
code3               int64
FIPS              float64
Admin2             object
Province_State     object
Country_Region     object
Lat               float64
Long_             float64
Combined_Key       object
Date               object
Confirmed           int64
Deaths              int64
dtype: object
In [6]:
#this is good to read the data like any other data table
df.head(5)
Out[6]:
UID iso2 iso3 code3 FIPS Admin2 Province_State Country_Region Lat Long_ Combined_Key Date Confirmed Deaths
0 16 AS ASM 16 60.0 NaN American Samoa US -14.271000 -170.132000 American Samoa, US 1/22/2020 0 0
1 316 GU GUM 316 66.0 NaN Guam US 13.444300 144.793700 Guam, US 1/22/2020 0 0
2 580 MP MNP 580 69.0 NaN Northern Mariana Islands US 15.097900 145.673900 Northern Mariana Islands, US 1/22/2020 0 0
3 63072001 PR PRI 630 72001.0 Adjuntas Puerto Rico US 18.180117 -66.754367 Adjuntas, Puerto Rico, US 1/22/2020 0 0
4 63072003 PR PRI 630 72003.0 Aguada Puerto Rico US 18.360255 -67.175131 Aguada, Puerto Rico, US 1/22/2020 0 0
In [7]:
#identify and handle missing values
#missing value, many ways and diferent situations require different approaches

#try to replace missing data from source data

#drop missing values
df.dropna(axis=0, inplace = True)
print(df.head(5))

#Replace missing values with an average, 
#df.replace() is the function but this data does not need it
        UID iso2 iso3  code3     FIPS        Admin2 Province_State  \
3  63072001   PR  PRI    630  72001.0      Adjuntas    Puerto Rico   
4  63072003   PR  PRI    630  72003.0        Aguada    Puerto Rico   
5  63072005   PR  PRI    630  72005.0     Aguadilla    Puerto Rico   
6  63072007   PR  PRI    630  72007.0  Aguas Buenas    Puerto Rico   
7  63072009   PR  PRI    630  72009.0      Aibonito    Puerto Rico   

  Country_Region        Lat      Long_                   Combined_Key  \
3             US  18.180117 -66.754367      Adjuntas, Puerto Rico, US   
4             US  18.360255 -67.175131        Aguada, Puerto Rico, US   
5             US  18.459681 -67.120815     Aguadilla, Puerto Rico, US   
6             US  18.251619 -66.126806  Aguas Buenas, Puerto Rico, US   
7             US  18.131361 -66.264131      Aibonito, Puerto Rico, US   

        Date  Confirmed  Deaths  
3  1/22/2020          0       0  
4  1/22/2020          0       0  
5  1/22/2020          0       0  
6  1/22/2020          0       0  
7  1/22/2020          0       0  
In [8]:
#data binning
##group values into bins
##converts numbers into categories
###bins = np.linspace(min(df['price']))
In [9]:
#categorical variables
In [10]:
fig = px.choropleth_mapbox(df, geojson=counties, locations='FIPS', color='Deaths',
                           color_continuous_scale="RdBu",
                           range_color=(0, 500),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'Confirmed':'confirmed cases'}
                          )
In [11]:
fig.show()
In [ ]: